######################################################/
## Anticipated Election Result and Protest Voting    ##
## -- Replication Material (Data Preparation Part 1) ##
## by: Christian Schimpf                             ##
## Version: July 01, 2019                            ##
## R-Version: 3.4.0.                                 ##
######################################################/

####
# NOTE 1: This R-Script is used to recode the variables selected
# from the original MEWD datasets and to generate any
# type of derivate variables that were used in the analyses.

####
# NOTE2 : In order to run the file properly, please specify working
# directories and any files paths accordingly. To identify paths
# that need to be changed accordingly, search for "#ACHTUNG#" using
# STR+F.

####
# NOTE3 : Throughout the scripts, I make references to Party 1, Party 2 etc.
# The corresponding parties are:
# Party 1 = Conservative Party
# Party 2 = New Democratic Party
# Party 3 = Liberal Party 
# Party 4 = Bloc Quebecois
# Party 5 = Green Party


########################################################################/
#### Part 2 - Recodes and generating derivative variables        #### 
########################################################################/

#Clear the working space
rm(list=ls()) 

# Set working directory 
#ACHTUNG# (Change path to folder in which the three Making Electoral Democracy Work Datasets are located)
setwd("<FOLDER>/CJPS Replication Material")


#Load Necessary Packages

packages <- c("ggplot2","foreign","stargazer","car",
              "dplyr", "Zelig","ZeligChoice","nnet",
              "reshape2", "effects", "stargazer",
              "readstata13", "interplot", "margins",
              "cowplot", "gridExtra", "broom", "rstanarm")
for (p in packages) {
  if (p %in% installed.packages()[,1]) require(p, character.only=T)
  else {
    install.packages(p)
    library(p, character.only=T)
  }
}

# Open Dataset
load("dfCan2015.RData")

#############################################################################//
#### /// 2.A  Data preparation - Dependent Variable (NonSince_FoUp)      ####
#############################################################################//


## Generate Dependent Variable with ultimately three categories: ##
# 0 = Close to one party, voted for that party (Sincere Vote)
# 1 = Close to one party, voted for another party (Insincere Vote)
# 2 = Close to one party, did not vote at all (Abstention)
#(Note: The category abstention was initially coded here, but not used in any of the
# analyses - see paper for more information)


#First, identify the party that respondent's rate highest (for R solution
# to identifying maximum value in given columns, see here: 
# https://stackoverflow.com/questions/17735859/for-each-row-return-the-column-name-of-the-largest-value
# last accessed, March 1, 2018):

dfCan2015$Q17_party1[dfCan2015$Q17_party1==99] <- NA
dfCan2015$Q17_party2[dfCan2015$Q17_party2==99] <- NA
dfCan2015$Q17_party3[dfCan2015$Q17_party3==99] <- NA
dfCan2015$Q17_party4[dfCan2015$Q17_party4==99] <- NA
dfCan2015$Q17_party5[dfCan2015$Q17_party5==99] <- NA


## Identify highest rated party (ties are broken randomly - will be recoded further below):
dfCan2015$PartyPref_string <- NA
dfCan2015$PartyPref_string <- max.col(replace(dfCan2015[16:20], is.na(dfCan2015[16:20]), -Inf), ties.method="random")
dfCan2015$PartyPref_string <- dfCan2015$PartyPref_string * NA^!rowSums(!is.na(dfCan2015[16:20]))

## Generate Variable: Maxmimum Rating of highest rated party:
#http://r.789695.n4.nabble.com/Calculating-a-Maximum-for-a-row-or-column-with-NA-s-td2014630.html
dfCan2015$PartyPref_Max <- NA
dfCan2015$PartyPref_Max<-apply(dfCan2015[16:20],1,max, na.rm=TRUE) 
dfCan2015$PartyPref_Max[dfCan2015$PartyPref_Max=="-Inf"] <- NA


## Generate Variable for respondents who are NA for rating variables of ALL parties:
dfCan2015$PartyLikeDislike_NA_Only <- apply(dfCan2015[,16:20], 1, function(x) all(is.na(x)))


## Identify cases in which respondents did not have a peak preference but rated at least two parties equally
## (Variable "Duplicates"):
dfCan2015$Dupl <- NA

dfCan2015$Dupl <- ifelse(dfCan2015$PartyPref_Max==dfCan2015$Q17_party1 & 
                           dfCan2015$Q17_party1==dfCan2015$Q17_party2  &
                           !is.na(dfCan2015$Q17_party1) & !is.na(dfCan2015$Q17_party2)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party1 & 
                           dfCan2015$Q17_party1==dfCan2015$Q17_party3  &
                           !is.na(dfCan2015$Q17_party1) & !is.na(dfCan2015$Q17_party3)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party1 & 
                           dfCan2015$Q17_party1==dfCan2015$Q17_party4  &
                           !is.na(dfCan2015$Q17_party1) & !is.na(dfCan2015$Q17_party4)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party1 & 
                           dfCan2015$Q17_party1==dfCan2015$Q17_party5  &
                           !is.na(dfCan2015$Q17_party1) & !is.na(dfCan2015$Q17_party5),
                         1, dfCan2015$Dupl)

dfCan2015$Dupl <- ifelse(dfCan2015$PartyPref_Max==dfCan2015$Q17_party2 & 
                           dfCan2015$Q17_party2==dfCan2015$Q17_party1  &
                           !is.na(dfCan2015$Q17_party2) & !is.na(dfCan2015$Q17_party1)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party2 & 
                           dfCan2015$Q17_party2==dfCan2015$Q17_party3  &
                           !is.na(dfCan2015$Q17_party2) & !is.na(dfCan2015$Q17_party3)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party2 & 
                           dfCan2015$Q17_party2==dfCan2015$Q17_party4  &
                           !is.na(dfCan2015$Q17_party2) & !is.na(dfCan2015$Q17_party4)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party2 & 
                           dfCan2015$Q17_party2==dfCan2015$Q17_party5  &
                           !is.na(dfCan2015$Q17_party2) & !is.na(dfCan2015$Q17_party5),
                         1, dfCan2015$Dupl)

dfCan2015$Dupl <- ifelse(dfCan2015$PartyPref_Max==dfCan2015$Q17_party3 & 
                           dfCan2015$Q17_party3==dfCan2015$Q17_party1  &
                           !is.na(dfCan2015$Q17_party3) & !is.na(dfCan2015$Q17_party1)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party3 & 
                           dfCan2015$Q17_party3==dfCan2015$Q17_party2  &
                           !is.na(dfCan2015$Q17_party3) & !is.na(dfCan2015$Q17_party2)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party3 & 
                           dfCan2015$Q17_party3==dfCan2015$Q17_party4  &
                           !is.na(dfCan2015$Q17_party3) & !is.na(dfCan2015$Q17_party4)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party3 & 
                           dfCan2015$Q17_party3==dfCan2015$Q17_party5  &
                           !is.na(dfCan2015$Q17_party3) & !is.na(dfCan2015$Q17_party5),
                         1, dfCan2015$Dupl)

dfCan2015$Dupl <- ifelse(dfCan2015$PartyPref_Max==dfCan2015$Q17_party4 & 
                           dfCan2015$Q17_party4==dfCan2015$Q17_party1  &
                           !is.na(dfCan2015$Q17_party4) & !is.na(dfCan2015$Q17_party1)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party4 & 
                           dfCan2015$Q17_party4==dfCan2015$Q17_party2  &
                           !is.na(dfCan2015$Q17_party4) & !is.na(dfCan2015$Q17_party2)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party4 & 
                           dfCan2015$Q17_party4==dfCan2015$Q17_party3  &
                           !is.na(dfCan2015$Q17_party4) & !is.na(dfCan2015$Q17_party3)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party4 & 
                           dfCan2015$Q17_party4==dfCan2015$Q17_party5  &
                           !is.na(dfCan2015$Q17_party4) & !is.na(dfCan2015$Q17_party5),
                         1, dfCan2015$Dupl)

dfCan2015$Dupl <- ifelse(dfCan2015$PartyPref_Max==dfCan2015$Q17_party5 & 
                           dfCan2015$Q17_party5==dfCan2015$Q17_party1  &
                           !is.na(dfCan2015$Q17_party5) & !is.na(dfCan2015$Q17_party1)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party5 & 
                           dfCan2015$Q17_party5==dfCan2015$Q17_party2  &
                           !is.na(dfCan2015$Q17_party5) & !is.na(dfCan2015$Q17_party2)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party5 & 
                           dfCan2015$Q17_party5==dfCan2015$Q17_party3  &
                           !is.na(dfCan2015$Q17_party5) & !is.na(dfCan2015$Q17_party3)|
                           dfCan2015$PartyPref_Max==dfCan2015$Q17_party5 & 
                           dfCan2015$Q17_party5==dfCan2015$Q17_party4  &
                           !is.na(dfCan2015$Q17_party5) & !is.na(dfCan2015$Q17_party4),
                         1, dfCan2015$Dupl)


# Recode all cases in which respondents did not evaluate a single party to NA:
dfCan2015$Dupl[dfCan2015$PartyLikeDislike_NA_Only=="TRUE"] <- NA

#### Based on these information, I now generate the Dependent Variable (NonSince_FoUp_Dum) used in all analyses. A follow up
#### question in the dataset is used to break the ties for those respondents who rated at least two parties equally (Variable: Q18) and the following 
#### categories:

# Generate PartyPref_Random with codes for highest rated party (1="party 1", 
#2="party 2" etc.):

dfCan2015$PartyPref_Random <- NA
dfCan2015$PartyPref_Random[dfCan2015$PartyPref_string=="1"] <- 1
dfCan2015$PartyPref_Random[dfCan2015$PartyPref_string=="2"] <- 2
dfCan2015$PartyPref_Random[dfCan2015$PartyPref_string=="3"] <- 3
dfCan2015$PartyPref_Random[dfCan2015$PartyPref_string=="4"] <- 4
dfCan2015$PartyPref_Random[dfCan2015$PartyPref_string=="5"] <- 5

# Generate Party Preference Variable:
dfCan2015$PartyPref_FoUp <- dfCan2015$PartyPref_Random
dfCan2015$PartyPref_FoUp[dfCan2015$Dupl==1] <- NA

#Recode cases who rank multiple parties the same according to follow up
#question (using Q18).
dfCan2015$PartyPref_FoUp[dfCan2015$Q18==1] <- 1 #code help variable
dfCan2015$PartyPref_FoUp[dfCan2015$Q18==2] <- 2
dfCan2015$PartyPref_FoUp[dfCan2015$Q18==3] <- 3
dfCan2015$PartyPref_FoUp[dfCan2015$Q18==4] <- 4
dfCan2015$PartyPref_FoUp[dfCan2015$Q18==5] <- 5
dfCan2015$PartyPref_FoUp[dfCan2015$Q18==88] <- NA
dfCan2015$PartyPref_FoUp[dfCan2015$Q18==99] <- NA


## Generate Dependent Variable "NonSince_FoUp"
# 0. Sincere Vote
# 1. Insincere Vote
# 2. Abstention

#Generate variable with only NA's 
dfCan2015$NonSince_FoUp <- NA

# Code 0s and 1s:
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==1 & dfCan2015$PQ6==1] <- 0
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==2 & dfCan2015$PQ6==2] <- 0
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==3 & dfCan2015$PQ6==3] <- 0
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==4 & dfCan2015$PQ6==4] <- 0
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==5 & dfCan2015$PQ6==5] <- 0

dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==1 & dfCan2015$PQ6!=1] <- 1
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==2 & dfCan2015$PQ6!=2] <- 1
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==3 & dfCan2015$PQ6!=3] <- 1
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==4 & dfCan2015$PQ6!=4] <- 1
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==5 & dfCan2015$PQ6!=5] <- 1


#Code abstention category for respondents using PQ5_01 (PQ5_01=Post Election Turnout Variable Version 1)
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==1 & 
                     dfCan2015$PQ5_1!=4] <- 2
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==2 & 
                     dfCan2015$PQ5_1!=4] <- 2
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==3 & 
                     dfCan2015$PQ5_1!=4] <- 2
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==4 & 
                     dfCan2015$PQ5_1!=4] <- 2
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==5 & 
                     dfCan2015$PQ5_1!=4] <- 2

#Code abstention category for respondents using PQ5_02 (PQ5_02=Post Election Turnout Variable Version 1)
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==1 & dfCan2015$PQ5_02==2] <- 2
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==2 & dfCan2015$PQ5_02==2] <- 2
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==3 & dfCan2015$PQ5_02==2] <- 2
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==4 & dfCan2015$PQ5_02==2] <- 2
dfCan2015$NonSince_FoUp[dfCan2015$PartyPref_FoUp==5 & dfCan2015$PQ5_02==2] <- 2

#Outcome variable - table:
table(dfCan2015$NonSince_FoUp)

#### Create Dummies (Sincere Vote / Protest Vote) which combines insincere vote and 
#### abstention into protest vote:

# Version 1: Follow Up used to divide ties in party ratings:
dfCan2015$NonSince_FoUp_Dum <- NA
dfCan2015$NonSince_FoUp_Dum[dfCan2015$NonSince_FoUp=="0"] <- 0 # Sincere Voters
dfCan2015$NonSince_FoUp_Dum[dfCan2015$NonSince_FoUp=="1"] <- 1 # Insincere Voters
dfCan2015$NonSince_FoUp_Dum[dfCan2015$NonSince_FoUp=="2"] <- 1 # Abstainers
dfCan2015$NonSince_FoUp_Dum <- as.factor(dfCan2015$NonSince_FoUp_Dum)
levels(dfCan2015$NonSince_FoUp_Dum) <- c("Sincere Vote", "Protest Vote")


#### Ensure that all dependent variables are factors:

dfCan2015$NonSince_FoUp <-as.factor(dfCan2015$NonSince_FoUp) 
table(dfCan2015$NonSince_FoUp)
# Should yield the following Ns:
# 0=2514
# 1=811
# 2=201

############################################################/
#### /// 2.B  Data preparation - Independent Variables  ####
###########################################################/


###############################################################################################/
#### >>> WinChaPref: Relative Winning Chances of District Candidate - Most Preferred Party ####
###############################################################################################/

# This variable measures the winning chances that respondents gave the party they
# identify with prior to the election:

#Recode missing values for original variables:
dfCan2015$Q28_party1 <- car::recode(dfCan2015$Q28_party1, "99=NA")
dfCan2015$Q28_party2 <- car::recode(dfCan2015$Q28_party2, "99=NA")
dfCan2015$Q28_party3 <- car::recode(dfCan2015$Q28_party3, "99=NA")
dfCan2015$Q28_party4 <- car::recode(dfCan2015$Q28_party4, "99=NA")
dfCan2015$Q28_party5 <- car::recode(dfCan2015$Q28_party5, "99=NA")

#Generate WinChaPref_FoUp variable (Measures Winning Chance that respondents attribute to the district candidate
# of their most preferred party):
dfCan2015$WinChaPref_FoUp <- NA
dfCan2015$WinChaPref_FoUp <- ifelse(!is.na(dfCan2015$PartyPref_FoUp) & dfCan2015$PartyPref_FoUp==1,
                                    dfCan2015$Q28_party1, dfCan2015$WinChaPref_FoUp)
dfCan2015$WinChaPref_FoUp <- ifelse(!is.na(dfCan2015$PartyPref_FoUp) & dfCan2015$PartyPref_FoUp==2,
                                    dfCan2015$Q28_party2, dfCan2015$WinChaPref_FoUp)
dfCan2015$WinChaPref_FoUp <- ifelse(!is.na(dfCan2015$PartyPref_FoUp) & dfCan2015$PartyPref_FoUp==3,
                                    dfCan2015$Q28_party3, dfCan2015$WinChaPref_FoUp)
dfCan2015$WinChaPref_FoUp <- ifelse(!is.na(dfCan2015$PartyPref_FoUp) & dfCan2015$PartyPref_FoUp==4,
                                    dfCan2015$Q28_party4, dfCan2015$WinChaPref_FoUp)
dfCan2015$WinChaPref_FoUp <- ifelse(!is.na(dfCan2015$PartyPref_FoUp) & dfCan2015$PartyPref_FoUp==5,
                                    dfCan2015$Q28_party5, dfCan2015$WinChaPref_FoUp)

## Generate Relative Winning Variable Categorical Variable with:

# This variable (which is used in the paper), measures the relative chances that voters assign 
# their most preferred party party. 
# To generate this variable, I first sum up the chances of all parties that voters
# are asked to evaluate. In Ontario and BC, the sum ranges from 0 to 40 because respondents
# are only asked to evaluate 4 parties. In Quebec, voters ara also asked to evaluate the Bloc
# Quebecois, hence, the variable ranges from 0 to 50. In Ontario and BC, the column for the
# Bloc is given a 0. In the second step, the win chances of the most preferred party
# are divided by the sum of chances given to all parties to generate the relative 
# win chance variable:

dfCan2015$WinChaSum <- dfCan2015$Q28_party1 + 
  dfCan2015$Q28_party2 +
  dfCan2015$Q28_party3 + 
  ifelse(is.na(dfCan2015$Q28_party4)==T, 0, dfCan2015$Q28_party4) + 
  dfCan2015$Q28_party5

dfCan2015$WinChaPref_Relative <- dfCan2015$WinChaPref_FoUp/dfCan2015$WinChaSum

###############################################################################################/
#### >>> IdeoDis_FoUp: Ideological Congruence (Perceived)                                  ####
###############################################################################################/


# This variable measures the absolute distance between respondent's left-right
# self-placement and where they place their party. 

#Set missing values for variables (Ideological Self Placement and LR Placement of Parties):
dfCan2015$Q30_1[dfCan2015$Q30_1==99] <- NA
dfCan2015$Q30_party1[dfCan2015$Q30_party1==99] <- NA
dfCan2015$Q30_party2[dfCan2015$Q30_party2==99] <- NA
dfCan2015$Q30_party3[dfCan2015$Q30_party3==99] <- NA
dfCan2015$Q30_party4[dfCan2015$Q30_party4==99] <- NA
dfCan2015$Q30_party5[dfCan2015$Q30_party5==99] <- NA


## Generate variable IdeoDis (Ideological Discongruence) ##

# This variable measures the absolute distance between respondent's left-right
# self-placement and where they place their party (based on party scale ratings). 


#Generate variable (Based on Single Peak Party Preferences - ties broken using the follow
# up question) and code values:
dfCan2015$IdeoDis_FoUp <- NA
dfCan2015$IdeoDis_FoUp <- ifelse(dfCan2015$PartyPref_FoUp==1, 
                                 dfCan2015$Q30_1-dfCan2015$Q30_party1,
                                 dfCan2015$IdeoDis_FoUp)
dfCan2015$IdeoDis_FoUp <- ifelse(dfCan2015$PartyPref_FoUp==2, 
                                 dfCan2015$Q30_1-dfCan2015$Q30_party2,
                                 dfCan2015$IdeoDis_FoUp)
dfCan2015$IdeoDis_FoUp <- ifelse(dfCan2015$PartyPref_FoUp==3, 
                                 dfCan2015$Q30_1-dfCan2015$Q30_party3,
                                 dfCan2015$IdeoDis_FoUp)
dfCan2015$IdeoDis_FoUp <- ifelse(dfCan2015$PartyPref_FoUp==4, 
                                 dfCan2015$Q30_1-dfCan2015$Q30_party4,
                                 dfCan2015$IdeoDis_FoUp)
dfCan2015$IdeoDis_FoUp <- ifelse(dfCan2015$PartyPref_FoUp==5, 
                                 dfCan2015$Q30_1-dfCan2015$Q30_party5,
                                 dfCan2015$IdeoDis_FoUp)

dfCan2015$IdeoDis_FoUp <- abs(dfCan2015$IdeoDis_FoUp)


###############################################################################################/
#### >>> Comp_FoUp_Dummy: Issue competence (most preferred party)                           ####
###############################################################################################/

##Generate: Most preferred party equipped to handle most important
#           issue - competence (Comp) (Coding based on Weber 2011, 910 with slight alternative)
# 0=most preferred party equipped to deal with most important issue
# 1=no party equipped to deal with most important issue
# 2=other than most preferred party best equipped to deal with most important issue

## Based on Single Peak Party Preferences - ties borken using the follow up question Q18:
dfCan2015$Comp_FoUp <- NA
dfCan2015$Comp_FoUp[dfCan2015$Q3==2] <- 1 #No party capable (Q3)
dfCan2015$Comp_FoUp[dfCan2015$Q3B==99] <- 1 #No party mentioned (Q3B)

dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==1 & dfCan2015$Q3B==1] <- 0
dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==2 & dfCan2015$Q3B==2] <- 0
dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==3 & dfCan2015$Q3B==3] <- 0
dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==4 & dfCan2015$Q3B==4] <- 0
dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==5 & dfCan2015$Q3B==5] <- 0

dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==1 & dfCan2015$Q3B!=1 & dfCan2015$Q3B!=99] <- 2
dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==2 & dfCan2015$Q3B!=2 & dfCan2015$Q3B!=99] <- 2
dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==3 & dfCan2015$Q3B!=3 & dfCan2015$Q3B!=99] <- 2
dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==4 & dfCan2015$Q3B!=4 & dfCan2015$Q3B!=99] <- 2
dfCan2015$Comp_FoUp[dfCan2015$PartyPref_FoUp==5 & dfCan2015$Q3B!=5 & dfCan2015$Q3B!=99] <- 2

dfCan2015$Comp_FoUp <- as.factor(dfCan2015$Comp_FoUp)
levels(dfCan2015$Comp_FoUp) <- c("Preference is most Competent",
                                 "No party Competent", 
                                 "Other party most Competent")

## The categories generated above are included in the final dummy that was used in the analyses:
# (Note: For the paper, I ultimately decided to use the dummy variable because the paper did 
# not focus on the role of attrativeness of alternative voting options. Hence, categories 2 and 3,
# were coded into a single category)

dfCan2015$Comp_FoUp_Dummy <- NA
dfCan2015$Comp_FoUp_Dummy[dfCan2015$Comp_FoUp=="Preference is most Competent"] <- 0
dfCan2015$Comp_FoUp_Dummy[dfCan2015$Comp_FoUp=="No party Competent"] <- 1
dfCan2015$Comp_FoUp_Dummy[dfCan2015$Comp_FoUp=="Other party most Competent"] <- 1
dfCan2015$Comp_FoUp_Dummy <- as.factor(dfCan2015$Comp_FoUp_Dummy)
levels(dfCan2015$Comp_FoUp_Dummy) <- c("Preference is most Competent",
                                 "Preference is not most Competent")
table(dfCan2015$Comp_FoUp_Dummy)
# Note: The Ns should be the following:
# Preference is msot competent: 2,672
# Preference is not most competent: 1,466


############################################################/
#### /// 2.C  Data preparation - Covariates            ####
###########################################################/


###############################################################################################/
#### >>> OutcomeImportance: Importance of Election Results (Constituency Level)             ####
###############################################################################################/

# Measures how important the election outcome at the constituency level is to respondents: 

## Generate variable: importance of election outcome in riding
#(0=don't care at all; 10=care a lot)
dfCan2015$OutcomeImportance <- dfCan2015$Q26
#recode don't knows into NA
dfCan2015$OutcomeImportance[dfCan2015$OutcomeImportance==99] <- NA

###############################################################################################/
#### >>> OutcomeImportance: Importance of Election Results (National Level)                ####
###############################################################################################/

# Measures how important the election outcome at the federal level is to respondents: 


## Generate variable: importance of who forms government
#(0=don't care at all; 10=care a lot)
dfCan2015$OutcomeImportance_National <- dfCan2015$Q20
#recode don't knows into NA
dfCan2015$OutcomeImportance_National[dfCan2015$OutcomeImportance_National==99] <- NA


###############################################################################################/
#### >>> ExpCom_N: Expection Electoral Competition (Constituency Level)                     ####
###############################################################################################/

# Measures respondents' expected electoral competition at the constituency level:

## Generate Variable: Expected Closeness of the Election 
dfCan2015$ExpCom_N <- NA
dfCan2015$ExpCom_N[dfCan2015$Q29==4] <- 3 # Very close
dfCan2015$ExpCom_N[dfCan2015$Q29==3] <- 2
dfCan2015$ExpCom_N[dfCan2015$Q29==2] <- 1
dfCan2015$ExpCom_N[dfCan2015$Q29==1] <- 0 # Not very close at all


###############################################################################################/
#### >>> ID_HighestRatedParty_FoUp: Identification with highest rated party                 ####
###############################################################################################/

# This variable measures whether or not respondents report to identify with the party they rate
# as their most preferred party. Categories: 
# 0=not identifying with highest rated party
# 1=identifying with highest rated party):

dfCan2015$ID_HighestRatedParty_FoUp <- 0
dfCan2015$ID_HighestRatedParty_FoUp[dfCan2015$PartyPref_FoUp==1 & 
                                      dfCan2015$Q47_1==1] <- 1
dfCan2015$ID_HighestRatedParty_FoUp[dfCan2015$PartyPref_FoUp==2 & 
                                      dfCan2015$Q47_1==2] <- 1
dfCan2015$ID_HighestRatedParty_FoUp[dfCan2015$PartyPref_FoUp==3 & 
                                      dfCan2015$Q47_1==3] <- 1
dfCan2015$ID_HighestRatedParty_FoUp[dfCan2015$PartyPref_FoUp==4 & 
                                      dfCan2015$Q47_1==4] <- 1
dfCan2015$ID_HighestRatedParty_FoUp[dfCan2015$PartyPref_FoUp==5 & 
                                      dfCan2015$Q47_1==5] <- 1

dfCan2015$ID_HighestRatedParty_FoUp <- as.factor(dfCan2015$ID_HighestRatedParty_FoUp)
table(dfCan2015$ID_HighestRatedParty_FoUp)

# Ns should be:
# 0=3,845
# 1=1,764

###############################################################################################/
#### >>> PrefTypeFoUp: Which party is respondent's most preferred                          ####
###############################################################################################/

# Variable is a categorical variable that captures which of the parties is the respondent's 
# most preferred party. The variable was used in one of the robustness checks. 

##Generate: Most Preferred Party is the following:
# 1 = Conservative Party
# 2 = New Democratic Party
# 3 = Liberal Party
# 4 = Bloc Quebecois
# 5 = Green Party

#Ties in preferences are broken using follow up question:
dfCan2015$PrefTypeFoUp <- NA
dfCan2015$PrefTypeFoUp[dfCan2015$PartyPref_FoUp==1] <- 1
dfCan2015$PrefTypeFoUp[dfCan2015$PartyPref_FoUp==2] <- 2
dfCan2015$PrefTypeFoUp[dfCan2015$PartyPref_FoUp==3] <- 3
dfCan2015$PrefTypeFoUp[dfCan2015$PartyPref_FoUp==4] <- 4
dfCan2015$PrefTypeFoUp[dfCan2015$PartyPref_FoUp==5] <- 5

#Turn into factor variable and add labels:
dfCan2015$PrefTypeFoUp <- as.factor(dfCan2015$PrefTypeFoUp)
levels(dfCan2015$PrefTypeFoUp) <- c("Conservative Part", "New Democratic Party",
                                    "Liberal Party", "Bloc Qu�b�cois",
                                    "Green Party")


###############################################################################################/
#### >>> LeaderPartyPref_CongruenceCat_FoUp: Leader Rating                                 ####
###############################################################################################/



## Variables measures how respondents' rate their most preferred parties' leaders: ##
# 0 = Leader of most preferred party is either rated as highest leader or among the highest rated leaders 
#     (in case of ties) 
# 1 = Favorite leader is NOT leader of most preferred party

# The five leaders are:
#Stephen Harper (Conservative Party; Q19_1)
#Thomas Mulcair (NDP, Q19_2)
#Justin Trudeau (LP, Q19_3)
#Gilles Duceppe (BQ, Q19_4)
#Elizabeth May (Green Party, Q19_5)

# Recode Missings (98=Don't know the leader; 99=Don't know) for party ratings:
#Stephen Harper (Conservative Party):
dfCan2015$Q19_1[dfCan2015$Q19_1>11] <- NA

#Thomas Mulcair (NDP):
dfCan2015$Q19_2[dfCan2015$Q19_2>11] <- NA

#Justin Trudeau (LP):
dfCan2015$Q19_3[dfCan2015$Q19_3>11] <- NA

#Gilles Duceppe (BC):
dfCan2015$Q19_4[dfCan2015$Q19_4>11] <- NA

#Elizabeth May (Green Party):
dfCan2015$Q19_5[dfCan2015$Q19_5>11] <- NA


#First, identify the leader that respondent's rate highest (for R solution
# to identifying maximum value in given columns, see here: 
#https://stackoverflow.com/questions/17735859/for-each-row-return-the-column-name-of-the-largest-value
# last accessed, March 1, 2018):

# Identify column numbers for leader ratings:
which( colnames(dfCan2015)=="Q19_1" )
which( colnames(dfCan2015)=="Q19_5" )

## Identify highest rated leader (ties are broken randomly - will be recoded further below):
dfCan2015$LeaderPref_string <- NA
dfCan2015$LeaderPref_string <- max.col(replace(dfCan2015[11:15], is.na(dfCan2015[11:15]), -Inf), ties.method="random")
dfCan2015$LeaderPref_string <- dfCan2015$LeaderPref_string * NA^!rowSums(!is.na(dfCan2015[11:15]))

## Generate Variable: Maxmimum Rating of highest rated leader
#http://r.789695.n4.nabble.com/Calculating-a-Maximum-for-a-row-or-column-with-NA-s-td2014630.html
dfCan2015$LeaderPref_Max <- NA
dfCan2015$LeaderPref_Max<-apply(dfCan2015[11:15],1,max, na.rm=TRUE) 
dfCan2015$LeaderPref_Max[dfCan2015$LeaderPref_Max=="-Inf"] <- NA


## Generate Variable for respondents who are NA for rating variables of ALL leaders:
dfCan2015$LeaderLikeDislike_NA_Only <- apply(dfCan2015[,11:15], 1, function(x) all(is.na(x)))


## Identify cases in which respondents did not have a peak preference but rated at least two leaders equally:
dfCan2015$Dupl_Leader <- NA

dfCan2015$Dupl_Leader <- ifelse(dfCan2015$LeaderPref_Max==dfCan2015$Q19_1 & 
                                  dfCan2015$Q19_1==dfCan2015$Q19_2  &
                                  !is.na(dfCan2015$Q19_1) & !is.na(dfCan2015$Q19_2)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_1 & 
                                  dfCan2015$Q19_1==dfCan2015$Q19_3  &
                                  !is.na(dfCan2015$Q19_1) & !is.na(dfCan2015$Q19_3)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_1 & 
                                  dfCan2015$Q19_1==dfCan2015$Q19_4  &
                                  !is.na(dfCan2015$Q19_1) & !is.na(dfCan2015$Q19_4)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_1 & 
                                  dfCan2015$Q19_1==dfCan2015$Q19_5  &
                                  !is.na(dfCan2015$Q19_1) & !is.na(dfCan2015$Q19_5),
                                1, dfCan2015$Dupl_Leader)

dfCan2015$Dupl_Leader <- ifelse(dfCan2015$LeaderPref_Max==dfCan2015$Q19_2 & 
                                  dfCan2015$Q19_2==dfCan2015$Q19_1  &
                                  !is.na(dfCan2015$Q19_2) & !is.na(dfCan2015$Q19_1)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_2 & 
                                  dfCan2015$Q19_2==dfCan2015$Q19_3  &
                                  !is.na(dfCan2015$Q19_2) & !is.na(dfCan2015$Q19_3)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_2 & 
                                  dfCan2015$Q19_2==dfCan2015$Q19_4  &
                                  !is.na(dfCan2015$Q19_2) & !is.na(dfCan2015$Q19_4)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_2 & 
                                  dfCan2015$Q19_2==dfCan2015$Q19_5  &
                                  !is.na(dfCan2015$Q19_2) & !is.na(dfCan2015$Q19_5),
                                1, dfCan2015$Dupl_Leader)

dfCan2015$Dupl_Leader <- ifelse(dfCan2015$LeaderPref_Max==dfCan2015$Q19_3 & 
                                  dfCan2015$Q19_3==dfCan2015$Q19_1  &
                                  !is.na(dfCan2015$Q19_3) & !is.na(dfCan2015$Q19_1)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_3 & 
                                  dfCan2015$Q19_3==dfCan2015$Q19_2  &
                                  !is.na(dfCan2015$Q19_3) & !is.na(dfCan2015$Q19_2)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_3 & 
                                  dfCan2015$Q19_3==dfCan2015$Q19_4  &
                                  !is.na(dfCan2015$Q19_3) & !is.na(dfCan2015$Q19_4)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_3 & 
                                  dfCan2015$Q19_3==dfCan2015$Q19_5  &
                                  !is.na(dfCan2015$Q19_3) & !is.na(dfCan2015$Q19_5),
                                1, dfCan2015$Dupl_Leader)

dfCan2015$Dupl_Leader <- ifelse(dfCan2015$LeaderPref_Max==dfCan2015$Q19_4 & 
                                  dfCan2015$Q19_4==dfCan2015$Q19_1  &
                                  !is.na(dfCan2015$Q19_4) & !is.na(dfCan2015$Q19_1)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_4 & 
                                  dfCan2015$Q19_4==dfCan2015$Q19_2  &
                                  !is.na(dfCan2015$Q19_4) & !is.na(dfCan2015$Q19_2)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_4 & 
                                  dfCan2015$Q19_4==dfCan2015$Q19_3  &
                                  !is.na(dfCan2015$Q19_4) & !is.na(dfCan2015$Q19_3)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_4 & 
                                  dfCan2015$Q19_4==dfCan2015$Q19_5  &
                                  !is.na(dfCan2015$Q19_4) & !is.na(dfCan2015$Q19_5),
                                1, dfCan2015$Dupl_Leader)

dfCan2015$Dupl_Leader <- ifelse(dfCan2015$LeaderPref_Max==dfCan2015$Q19_5 & 
                                  dfCan2015$Q19_5==dfCan2015$Q19_1  &
                                  !is.na(dfCan2015$Q19_5) & !is.na(dfCan2015$Q19_1)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_5 & 
                                  dfCan2015$Q19_5==dfCan2015$Q19_2  &
                                  !is.na(dfCan2015$Q19_5) & !is.na(dfCan2015$Q19_2)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_5 & 
                                  dfCan2015$Q19_5==dfCan2015$Q19_3  &
                                  !is.na(dfCan2015$Q19_5) & !is.na(dfCan2015$Q19_3)|
                                  dfCan2015$LeaderPref_Max==dfCan2015$Q19_5 & 
                                  dfCan2015$Q19_5==dfCan2015$Q19_4  &
                                  !is.na(dfCan2015$Q19_5) & !is.na(dfCan2015$Q19_4),
                                1, dfCan2015$Dupl_Leader)

# Recode all cases in which respondents did not evaluate a single leader to NA:
dfCan2015$Dupl_Leader[dfCan2015$LeaderLikeDislike_NA_Only=="TRUE"] <- NA

# Generate PartyPref_Random with codes for highest rated party (1="party 1", 
#2="party 2" etc.):

dfCan2015$LeaderPref_Random <- NA
dfCan2015$LeaderPref_Random[dfCan2015$LeaderPref_string==1] <- 1
dfCan2015$LeaderPref_Random[dfCan2015$LeaderPref_string==2] <- 2
dfCan2015$LeaderPref_Random[dfCan2015$LeaderPref_string==3] <- 3
dfCan2015$LeaderPref_Random[dfCan2015$LeaderPref_string==4] <- 4
dfCan2015$LeaderPref_Random[dfCan2015$LeaderPref_string==5] <- 5

# Generate LeaderPartyPref_CongruenceCat_FoUp with codes for highest rated party (1="party 1", 
#2="party 2" etc.) and code "9. TIED LEADER RATING"

dfCan2015$LeaderPref <- dfCan2015$LeaderPref_Random
dfCan2015$LeaderPref[dfCan2015$Dupl_Leader==1] <- 9

# Ties in max leader ratings are coded in third category, codes
#   0. Most preferred leader is leader of most preferred party (MAX Leader Ratings tied)
#   1. Most preferred leader is NOT leader of most preferred party

dfCan2015$LeaderPartyPref_CongruenceCat_FoUp <- NA
dfCan2015$LeaderPartyPref_CongruenceCat_FoUp <- ifelse(dfCan2015$LeaderPref==dfCan2015$PartyPref_FoUp,0,1)
dfCan2015$LeaderPartyPref_CongruenceCat_FoUp[dfCan2015$LeaderPref==9] <- 0
dfCan2015$LeaderPartyPref_CongruenceCat_FoUp <- as.factor(dfCan2015$LeaderPartyPref_CongruenceCat_FoUp)


###############################################################################################/
#### >>> LocalLeader_Congruence_FoUp: Local Leader Rating                                 ####
###############################################################################################/


## Local Leader Congruence with Party Rating (for all four party ID versions):

# 0= Favorite Local Leader is from preferred party/No Favorite Local Leader
# 1= Favorite Local Leader is NOT from preferred party

# Set Missings:
dfCan2015$Q27[dfCan2015$Q27==9] <- NA #DK to missing
dfCan2015$Q27[dfCan2015$Q27A==99] <- NA #DK to missing

dfCan2015$LocalLeader_Congruence_FoUp <- NA
dfCan2015$LocalLeader_Congruence_FoUp[dfCan2015$PartyPref_FoUp==dfCan2015$Q27A] <- 0
dfCan2015$LocalLeader_Congruence_FoUp[dfCan2015$PartyPref_FoUp!=dfCan2015$Q27A] <- 1
dfCan2015$LocalLeader_Congruence_FoUp[dfCan2015$Q27==2] <- 0
dfCan2015$LocalLeader_Congruence_FoUp <- as.factor(dfCan2015$LocalLeader_Congruence_FoUp)


###############################################################################################/
#### >>> Demographics: Education, Age, Gender, Province                                   ####
###############################################################################################/


##Education (Dummy for Postsecondary Education):
dfCan2015$Edu <- dfCan2015$POSTSECONDARY

##Age (Year of election minus Year of Birth)
dfCan2015$Age <- dfCan2015$age

##Gender (Male=0, Female=1)
dfCan2015$Gender <- NA
dfCan2015$Gender[dfCan2015$gend==1] <- 0
dfCan2015$Gender[dfCan2015$gend==2] <- 1
dfCan2015$Gender <- as.factor(dfCan2015$Gender)

levels(dfCan2015$Gender) <- c("Male", "Female")

##Province Categorical Variable (1=BC, 2=Ontario, 3=Quebec)-. Factor variable,
## levels are assigned.

dfCan2015$Province <- NA
dfCan2015$Province[dfCan2015$ELECID==1] <- 1
dfCan2015$Province[dfCan2015$ELECID==2] <- 2
dfCan2015$Province[dfCan2015$ELECID==3] <- 3


dfCan2015$Province <- as.factor(dfCan2015$Province)

levels(dfCan2015$Province) <- c("Quebec", 
                                "British Columbia", 
                                "Ontario")




######################## END OF RSCRIPT #######################/